set matsize 11000 

* ) Insert directory
cd ""
use "Author-paper-referee level data", clear

***************************************

* ) Top-coding distance and include missing
replace referee_distance = 6 if referee_distance>6 | referee_distance==.
replace editor_distance = 6 if editor_distance>6 | editor_distance==.
tab referee_distance, gen(refdegree)

bysort referee_id: egen max1 = max(refdegree1)
bysort referee_id: egen max2 = max(refdegree2)
bysort referee_id: egen max3 = max(refdegree3)
bysort referee_id: egen max4 = max(refdegree4)
bysort referee_id: egen max5 = max(refdegree5)
bysort referee_id: egen max6 = max(refdegree6)
order referee_id, after(max6)

gen ref_12 = (max1==1 & max2==1)
gen ref_13 = (max1==1 & max3==1)
gen ref_14 = (max1==1 & max4==1)
gen ref_15 = (max1==1 & max5==1)
gen ref_16 = (max1==1 & max6==1)

gen ref_23 = (max2==1 & max3==1)
gen ref_24 = (max2==1 & max4==1)
gen ref_25 = (max2==1 & max5==1)
gen ref_26 = (max2==1 & max6==1)

gen ref_34 = (max3==1 & max4==1)
gen ref_35 = (max3==1 & max5==1)
gen ref_36 = (max3==1 & max6==1)

gen ref_45 = (max4==1 & max5==1)
gen ref_46 = (max4==1 & max6==1)

gen ref_56 = (max5==1 & max6==1)

* ) Can collapse to the reviewer level now
preserve
bysort referee_id: keep if _n==1
collapse (sum) ref_*
list
restore

* ) Calculate each referee's RR rate across different degrees
forvalues x = 1(1)6 {
bysort referee_id refdegree`x': egen meanRR`x' = mean(RR)
replace meanRR`x' = . if refdegree`x'==0
sum meanRR`x', 
}

* ) Calculate different RR rate across degrees for each referee type
order referee_id, after(ref_56)

collapse (mean) meanRR*, by(referee_id ref_*)

forvalues x = 1(1)5 {
local y=`x'+1
gen diff_`x'`y' = meanRR`x'-meanRR`y'
}
forvalues x = 1(1)4 {
local y=`x'+2
gen diff_`x'`y' = meanRR`x'-meanRR`y'
}
forvalues x = 1(1)3 {
local y=`x'+3
gen diff_`x'`y' = meanRR`x'-meanRR`y'
}
forvalues x = 1(1)2 {
local y=`x'+4
gen diff_`x'`y' = meanRR`x'-meanRR`y'
}
forvalues x = 1(1)1 {
local y=`x'+5
gen diff_`x'`y' = meanRR`x'-meanRR`y'
}
* ) Mean differences in RR rates, each referee gets one observation
collapse (mean) diff_*
order diff_1* diff_2* diff_3* diff_4* diff_5
list
 
